Note: This notebook about 40 MB of embedded output data, which should be put under version control.
Please make sure to clear all cell output using the Cell>All Output>Clear command from the menu befor committing changes!
Use Cell>Run all to reproduce the output.
In [ ]:
import numpy as np
import librosa
import mir_eval
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display
from IPython.display import display
import os, deepthought
from deepthought.datasets.openmiir.metadata import load_stimuli_metadata, save_beat_times
STIMULI_VERSION = 2 # change to 1 for older stimuli version
data_root = os.path.join(deepthought.DATA_PATH, 'OpenMIIR')
default_save_beat_times = False # change to True to save beat time to txt file
def play_beats(y, sr, beats):
if y is None:
# Sonify the beats only
y_beat = mir_eval.sonify.clicks(beats, sr, length=len(y))
else:
# Sonify the beats and add them to y
y_beat = y + mir_eval.sonify.clicks(beats, sr, length=len(y))
return IPython.display.Audio(data=y_beat, rate=sr)
def visualize(y, sr, title=None, playback=True, beats=None):
# show playback widget above figure
if playback:
if title is not None:
print title
if beats is None:
display(IPython.display.Audio(data=y, rate=sr))
else:
beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=64)
display(play_beats(y, sr, beat_times))
# Let's make and display a mel-scaled power (energy-squared) spectrogram
# We use a small hop length of 64 here so that the frames line up with the beat tracker example below.
S = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=64, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
# Make a new figure
plt.figure(figsize=(12,4))
# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sr, hop_length=64, x_axis='time', y_axis='mel')
# Put a descriptive title on the plot
if title is not None:
plt.title('mel power spectrogram ({})'.format(title))
if beats is not None:
# Let's draw lines with a drop shadow on the beat events
plt.vlines(beats, 0, log_S.shape[0], colors='k', linestyles='-', linewidth=2.5)
plt.vlines(beats, 0, log_S.shape[0], colors='w', linestyles='-', linewidth=1.5)
# draw a color bar
plt.colorbar(format='%+02.0f dB')
# Make the figure layout compact
plt.tight_layout()
# This make sure the figures are plotted in place and not after text and audio
plt.show()
plt.close()
def _analyze_beats(audio_filepath, bpm, label=None, tightness=400, offset=0, duration=None, vy=True, vh=True, vp=True, vb=True):
print audio_filepath
# load audio file
# sr = 22050 # default
sr = 44100 # slower but gives better results for Harry Potter Theme
y, sr = librosa.load(audio_filepath, sr=sr, offset=offset, duration=duration)
if label is not None:
print label
if vy:
visualize(y, sr, 'original')
# split into harmonic and percussive component
y_harmonic, y_percussive = librosa.effects.hpss(y)
if vh:
visualize(y_harmonic, sr, 'harmonic component')
if vp:
visualize(y_percussive, sr, 'percussive component')
# Now, let's run the beat tracker
# We'll use the percussive component for this part
# By default, the beat tracker will trim away any leading or trailing beats that don't appear strong enough.
# To disable this behavior, call beat_track() with trim=False.
tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, hop_length=64, trim=False, start_bpm=bpm, tightness=tightness)
# Let's re-draw the spectrogram, but this time, overlay the detected beats
if vb:
visualize(y, sr, 'with beats', beats=beats)
print 'Offset: %.4f s' % offset
print 'Expected tempo: %.2f BPM' % bpm
print 'Estimated tempo: %.2f BPM' % tempo
print 'First 5 beat frames: ', beats[:5]
# Frame numbers are great and all, but when do those beats occur?
print 'First 5 beat times: ', librosa.frames_to_time(beats[:5], sr=sr, hop_length=64)
return tempo, beats, librosa.frames_to_time(beats, sr=sr, hop_length=64)
def get_audio_filepath(meta):
return os.path.join(data_root, 'audio', 'full.v{}'.format(STIMULI_VERSION), meta['audio_file'])
def analyze_beats(meta, tightness=400, save=default_save_beat_times, **kwargs):
tempo, beat_frames, beat_times = _analyze_beats(
audio_filepath=get_audio_filepath(meta),
label=meta['label'],
bpm=meta['bpm'],
tightness=tightness,
offset=meta['length_of_cue'],
**kwargs
)
if save:
offset = meta['length_of_cue']
save_beat_times(beat_times, stimulus_id=meta['id'], offset=offset, version=STIMULI_VERSION)
return tempo, beat_frames, beat_times
In [ ]:
# NOTE: this is experimental
def analyze_onsets(meta):
audio_filepath=os.path.join(data_root, 'audio', 'full.v{}'.format(STIMULI_VERSION), meta['audio_file'])
sr = 44100 # slower but gives better results for Harry Potter Theme
offset=meta['length_of_cue']
duration=None
print sr
y, sr = librosa.load(audio_filepath, sr=sr, offset=offset, duration=duration)
"""
# Get onset times from a signal
onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=64)
onset_times = librosa.frames_to_time(onset_frames, sr, hop_length=64)
# Or use a pre-computed onset envelope
o_env = librosa.onset.onset_strength(y, sr=sr)
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
onset_times = librosa.frames_to_time(onset_frames, sr, hop_length=64)
"""
onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=64)
print onset_frames
visualize(y, sr, 'with beats', beats=onset_frames)
o_env = librosa.onset.onset_strength(y, sr=sr)
plt.plot(o_env)
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
print onset_frames
visualize(y, sr, 'with beats', beats=onset_frames*7)
In [ ]:
meta = load_stimuli_metadata(data_root, version=STIMULI_VERSION)
# print meta
In [ ]:
# run this to analyze onsets for stimulus 22
analyze_onsets(meta[22])
In [ ]:
# run this to analyze onsets for stimulus 1, specify tightness
tempo, beat_frames, beat_times = analyze_beats(meta[1], tightness=800)
print beat_times
print beat_frames
In [ ]:
# analyze beginning of stimulus 22,
# suppress visualization of original signal (vy) and harmonic (vh) and percussive (vp) component
_analyze_beats(get_audio_filepath(meta[22]),
bpm=166, tightness=250, offset=2.182, duration=4.0, vy=False, vh=False, vp=False);
In [ ]:
# test different tightness settings on stimulus 22
_analyze_beats(get_audio_filepath(meta[22]),
bpm=166, tightness=250, offset=0, duration=None, vy=False, vh=False, vp=False);
_analyze_beats(get_audio_filepath(meta[22]),
bpm=166, tightness=400, offset=0, duration=None, vy=False, vh=False, vp=False);
_analyze_beats(get_audio_filepath(meta[22]),
bpm=166, tightness=800, offset=0, duration=None, vy=False, vh=False, vp=False);
In [ ]:
_ = analyze_beats(meta[1], tightness=1000, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[2], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[3], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[4], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[11], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[12], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[13], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[14], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[21], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[22], tightness=300, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[23], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
_ = analyze_beats(meta[24], tightness=800, vy=False, vh=False, vp=False)
In [ ]:
from deepthought.datasets.openmiir.constants import STIMULUS_IDS
for stimulus_id in STIMULUS_IDS:
tempo, beat_frames, beat_times = _analyze_beats(
audio_filepath=os.path.join(data_root, 'audio', 'cues.v{}'.format(STIMULI_VERSION), meta[stimulus_id]['cue_file']),
label=meta[stimulus_id]['label'],
bpm=meta[stimulus_id]['cue_bpm'],
tightness=10000, vy=False, vh=False, vp=False
)
if default_save_beat_times:
save_beat_times(beat_times, stimulus_id=stimulus_id, cue=True, version=STIMULI_VERSION)